clear all
set more off
cap log close

********************************************************************************
***** Project: The Short and Long Term Effects of In-Person Performance Feedback
********************************************************************************
***** A. R. Soetevent & G. J. Romensen
********************************************************************************
***** Preparing Dataset for Analysis
********************************************************************************
***** Latest update: 18-01-2020
********************************************************************************
*** WARNING: TAKES CONSIDERABLE TIME TO RUN.
********************************************************************************
global filepath "C:\JPEMicReplication"
log using "$filepath/Logs/06Preparingforanalysis.log", replace


********************************************************************************
*** DATA FRIESLAND 
********************************************************************************
*** 1. Determine driver-specific post-feedback period

local i=0

while `i'<=11 {
import excel "$filepath\Data\Using databases\feedback_`i'.xlsx", firstrow
keep if total_score!=.
levelsof driver_id, local(yesfeed_`i')
global yesfeed_`i' `yesfeed_`i''
clear
local i=`i'+1
}

use "$filepath\ConstructedData\cleaned_tripdata_1501 tm 1701.dta"

local i=0

while `i'<=11 {
gen yesfeed_`i'=0
foreach x of global yesfeed_`i' {
qui replace yesfeed_`i'=1 if chauf_nr_rug==`x'
}
local i=`i'+1
}

gen post_0=cond(datum>=date("15-12-2015", "DMY"),1,0)

local i=1

while `i'<=11 {
gen post_`i'=cond(datum>=date("15-`i'-2016", "DMY") ,1,0)
local i=`i'+1
}
 
gen postfeedback=0
foreach x of numlist 0(1)10 {
qui replace postfeedback=1 if post_`x'==1 & postfeedback==0 & yesfeed_`x'==1
}

gen noreport=cond(post_0==1 & postfeedback==0,1,0)
gen nonotification=cond(post_11==1 & yesfeed_11==0,1,0)

*** 2. Determine driver-specific feedback rounds

levelsof chauf_nr_rug, local(level_driver)

gen feedround=0

local i=0

while `i'<=10 {
foreach x of local level_driver {
qui sum yesfeed_`i' if chauf_nr_rug==`x'
local `i'_`x'=r(mean)
qui sum feedround if chauf_nr_rug==`x' & post_`i'==1
local feedround_`i'_`x'=r(mean)
qui replace feedround=`feedround_`i'_`x''+``i'_`x'' if chauf_nr_rug==`x' & post_`i'==1
}
local i=`i'+1
}

*** 3. Determine post-announcement and post-experiment period

gen postannouncement=cond(datum>=date("9-11-2015", "DMY"),1,0)
gen postexperiment=cond(datum>=date("15-11-2016", "DMY") & yesfeed_11==1,1,0)

*** 4. Merging using databases with master-file 

sort chauf_nr_rug
joinby chauf_nr_rug using "$filepath\Data\Using databases\coachlogs.dta", unmatched(master)
drop _merge
joinby chauf_nr_rug using "$filepath\Data\Using databases\Overallfeedback.dta", unmatched(master)
drop _merge
joinby chauf_nr_rug using "$filepath\Data\Using databases\Medewerkers.dta", unmatched(master)
drop _merge

sort datum
joinby datum using "$filepath\Data\Using databases\Weather.dta", unmatched(master)
drop _merge

sort chauf_nr_rug datum

*** 5. Construct control variables

gen postcoaching=0
bysort chauf_nr_rug: replace postcoaching=cond(datum >= coachdatum_1,1,0)

gen schooltraject=cond(lijn_nr>=600 & lijn_nr<=699,1,0)
gen weekendrit=cond(rit_nr>=6000 & rit_nr<=7999,1,0)
gen vakantierit=cond(rit_nr>=8000 & rit_nr<=8999,1,0)
gen uitleenrit=cond(ves_naam!=ves_naam_chauf,1,0)
gen stadsrit=cond(traject=="Stad",1,0)

gen byte vdl10=cond(strpos(voertuig_omschrijving, "ALE"),1,0)
gen byte vdl14=cond(strpos(voertuig_omschrijving, "XLE"),1,0)
gen byte intouro=cond(strpos(voertuig_omschrijving, "INTOURO"),1,0)
gen byte iris10=cond(strpos(voertuig_omschrijving, "10,5 M"),1,0)
gen byte daf=cond(strpos(voertuig_omschrijving, "DAF"),1,0)
gen byte vvo89= cond(strpos(voertuig_omschrijving, "8900"),1,0)
gen byte vvo77H=cond(strpos(voertuig_omschrijving, "Volvo 7700 B5 LH Hybrid"),1,0)


gen departurehour=hh(werkelijke_begintijd_rit)
gen ochtendspits=cond(departurehour>=7 & departurehour<=9,1,0)
gen avondspits=cond(departurehour>=16 & departurehour<=18,1,0)
drop departurehour

replace rijden_afstand=rijden_afstand_dp if rijden_afstand==.

rename cnt_ci ovcheckins	

*** 6. Reduce number of columns. 

drop commentaar_in_* plaatsnaam_begin plaatsnaam_eind post_* yesfeed_* omloop_nr
 
*** 7. Save dataset for analysis. 

save "$filepath\ConstructedData\analysis_tripdata_1501 tm 1701.dta", replace

********************************************************************************
*** DATA ZUID HOLLAND
********************************************************************************
clear all

use "$filepath\ConstructedData\cleaned_ZHDAV_1507 tm 1712.dta"


*** 5. Construct control variables
gen postcoaching=0
bysort driverid: replace postcoaching=cond(datum >= coachdatum_1,1,0)

gen dvdw = dow(datum)
tab dvdw
tab rit_nr if dvdw==0 | dvdw==6
tab dvdw if rit_nr >5000 & rit_nr<9000
tab dvdw if rit_nr >8000 & rit_nr<9000
tab dvdw if rit_nr >5000 & rit_nr<8000
tab dvdw if rit_nr <5000
tab dvdw if rit_nr >9000
tab datum if rit_nr >8000 & rit_nr<9000
drop dvdw 

** Clearly also in ZH rit_nr between 5000-8000 is weekend; 8000-9000 is holiday, just as in FR
gen weekendrit=cond(rit_nr>=5000 & rit_nr<=7999,1,0)
gen vakantierit=cond(rit_nr>=8000 & rit_nr<=8999,1,0)

gen daf=cond(strpos(voertuig_omschrijving, "DAF"),1,0)
gen vdl10=cond(strpos(voertuig_omschrijving, "ALE"),1,0)
gen vdl12=cond(strpos(voertuig_omschrijving, "120"),1,0)
gen vdl14=cond(strpos(voertuig_omschrijving, "XLE"),1,0)
gen iris10=cond(strpos(voertuig_omschrijving, "10,5 M"),1,0)
gen vvo89= cond(strpos(voertuig_omschrijving, "8900"),1,0)
gen vvo77H=cond(strpos(voertuig_omschrijving, "Volvo 7700 B5 LH Hybrid"),1,0)

gen departurehour=hh(werkelijke_begintijd_rit)
gen ochtendspits=cond(departurehour>=7 & departurehour<=9 & departurehour!=.,1,0)
gen avondspits=cond(departurehour>=16 & departurehour<=18 & departurehour!=.,1,0)
drop departurehour

replace rijden_afstand=rijden_afstand_dp if rijden_afstand==. 

tab ves_naam
tab ves_naam_chauf

local location "Alblasserdam Alphen Gouda Krimpen Leiden Lisse Schelluinen Dordrecht"

foreach var of local location {
replace ves_naam="`var'" if strpos(ves_naam, "`var'")
}

tab ves_naam

gen uitleenrit=cond(ves_naam!=ves_naam_chauf,1,0)

tab uitleenrit

egen lijn_nr_unique=group(ves_naam lijn_nr)

rename cnt_ci ovcheckins

*** 6. Reduce number of columns. 

drop plaatsnaam_begin plaatsnaam_eind  omloop_nr

/*** 7. Save the data for analysis ***/

save "$filepath\ConstructedData\analysis_ZHDAV_1507 tm 1712.dta", replace

